loading compiled data and bootstraping python environment


In [3]:
%run "0. config.ipynb"


https://api.redmetrics.io/v1/event.csv?game=0fe53936-027f-461c-b087-b5b5737b2163&gameVersion=51b8a78a-8dd3-4a5e-9f41-01e6805e0f52&entityType=event
1866

In [101]:
players_stats = pd.DataFrame.from_csv("data/players_stats.csv", encoding="utf8")

In [102]:
players_stats.head()


Out[102]:
complete configure craft death equip gotomooc gotourl pickup reach restart selectmenu start switch unequip duration (seconds) section
playerId
0023dbb1-7f98-4cdb-8122-722f801f40b3 0 1 0 3 0 0 0 1 2 0 1 0 0 0 175 2
01b0c435-f0c0-4bfd-9189-86fc0d29b163 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
02bc076b-32aa-467a-bbc6-b746abedb7bd 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
02c6953a-0417-4858-8efb-1989be9f6b9d 0 1 0 0 0 0 0 0 1 0 2 1 1 0 19 1
0306db66-081d-4035-b30f-8358469d6ec3 1 0 3 17 8 0 1 5 12 1 2 0 0 2 1030 8

PCA YOLO


In [103]:
from sklearn.decomposition import PCA

In [104]:
players_stats.mean()


Out[104]:
complete                0.156028
configure               0.205674
craft                   0.312057
death                   4.198582
equip                   1.347518
gotomooc                0.007092
gotourl                 0.070922
pickup                  1.049645
reach                   3.425532
restart                 0.014184
selectmenu              1.184397
start                   0.680851
switch                  0.191489
unequip                 0.390071
duration (seconds)    570.815603
section                 2.773050
dtype: float64

In [94]:
X = players_stats.as_matrix()
n = X.shape[0]

pca = PCA(n_components=2)
X_r = pca.fit(X).transform(X)
X_vr = pca.explained_variance_ratio_

cmap = plt.get_cmap("YlOrRd")

print sum(X_vr)

X_std = 10 + 40 * players_stats["duration (seconds)"] / players_stats["duration (seconds)"].std()
X_color = cmap(players_stats["section"]/8)

plt.figure()
plt.scatter(X_r[:, 0], X_r[:, 1], s=X_std, c=X_color, alpha=.6, lw=0) #, c=c, label=target_name)
plt.title("all players (n = %s)" % (n))
plt.legend()


0.999987828303

In [96]:
for title in list_types:
  df = players_stats[ players_stats[title] > 0 ] 
  
  X = df.as_matrix()
  n = X.shape[0]
  
  # print X
  # print n
  
  if n > 1:
    pca = PCA(n_components=2)
    X_r = pca.fit(X).transform(X)

    # print sum(pca.explained_variance_ratio_)
    
    d = X_std[df.index]
    c = cmap(df["section"]/8)

    
    plt.figure()
    plt.scatter(X_r[:, 0], X_r[:, 1], s=d, c=c, alpha=.6 , lw=0) #, c=c, label=target_name)
    plt.title("%s > 0 (n = %s)" % (title, n))
    plt.legend()
  else:
    print "we skipped %s because there is no enough data" % title

plt.show();


we skipped gotomooc because there is no enough data

In [17]:
plt.hist(players_stats["reach"], bins=20)


Out[17]:
(array([ 64.,  27.,   3.,   8.,   9.,  22.,   5.,   1.,   0.,   0.,   0.,
          1.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   1.]),
 array([  0. ,   1.6,   3.2,   4.8,   6.4,   8. ,   9.6,  11.2,  12.8,
         14.4,  16. ,  17.6,  19.2,  20.8,  22.4,  24. ,  25.6,  27.2,
         28.8,  30.4,  32. ]),
 <a list of 20 Patch objects>)

In [92]:
for checkpoint in range(0,9):
  df = players_stats[ players_stats["section"] == checkpoint ] 
  
  X = df.as_matrix()
  n = X.shape[0]
  
  # print X
  # print n
  
  if n > 1:
    pca = PCA(n_components=2)
    X_r = pca.fit(X).transform(X)

    d = X_std[df.index]
    c = cmap(df["section"]/8)

    
    plt.figure()
    plt.scatter(X_r[:, 0], X_r[:, 1], s=d, alpha=.6 , lw=0) #, c=c, label=target_name)
    plt.title("checkpoint = %s (n = %s)" % (checkpoint, n))
    plt.legend()
  else:
    print "we skipped %s because there is no enough data" % checkpoint

plt.show();


we skipped 5 because there is no enough data